The Effect of Maternal Tobacco Smoke Exposure on the Placental Transcriptome

Study description

This study is about….

Data cleaning and wrangling

ncbi_data <- read_tsv(here("data/01_ncbi_data.tsv.gz"))  # Read and display the gzipped TSV file
print(head(ncbi_data, 10))  # Print the first 10 rows to display
# A tibble: 10 × 77
   col_1    col_2 col_3 col_4 col_5 col_6 col_7 col_8 col_9 col_10 col_11 col_12
   <chr>    <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>  <chr>  <chr> 
 1 !Series… Effe… <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>   <NA>   <NA>  
 2 !Series… GSE1… <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>   <NA>   <NA>  
 3 !Series… Publ… <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>   <NA>   <NA>  
 4 !Series… Sep … <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>   <NA>   <NA>  
 5 !Series… Jan … <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>   <NA>   <NA>  
 6 !Series… 2009… <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>   <NA>   <NA>  
 7 !Series… Smok… <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>   <NA>   <NA>  
 8 !Series… The … <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>   <NA>   <NA>  
 9 !Series… Expr… <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>   <NA>   <NA>  
10 !Series… Hana… <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>   <NA>   <NA>  
# ℹ 65 more variables: col_13 <chr>, col_14 <chr>, col_15 <chr>, col_16 <chr>,
#   col_17 <chr>, col_18 <chr>, col_19 <chr>, col_20 <chr>, col_21 <chr>,
#   col_22 <chr>, col_23 <chr>, col_24 <chr>, col_25 <chr>, col_26 <chr>,
#   col_27 <chr>, col_28 <chr>, col_29 <chr>, col_30 <chr>, col_31 <chr>,
#   col_32 <chr>, col_33 <chr>, col_34 <chr>, col_35 <chr>, col_36 <chr>,
#   col_37 <chr>, col_38 <chr>, col_39 <chr>, col_40 <chr>, col_41 <chr>,
#   col_42 <chr>, col_43 <chr>, col_44 <chr>, col_45 <chr>, col_46 <chr>, …
characteristics_data <- read_tsv(here("data/02_dat_clean_characteristics.tsv"))  # Read characteristics data
print(head(characteristics_data, 10))  # Print the first 10 rows to display
# A tibble: 10 × 8
   sample_id age_years status     maternal_bmi parity gestational_age_weeks
   <chr>         <dbl> <chr>             <dbl>  <dbl>                 <dbl>
 1 GSM451162        32 non-smoker         25.7      1                    39
 2 GSM451163        28 non-smoker         18.8      1                    37
 3 GSM451164        38 non-smoker         21.3      2                    36
 4 GSM451165        34 non-smoker         23.5      3                    41
 5 GSM451166        27 smoker             22        2                    42
 6 GSM451167        31 non-smoker         31.8      2                    39
 7 GSM451168        31 non-smoker         25.6      1                    39
 8 GSM451169        32 non-smoker         19.8      1                    39
 9 GSM451170        32 non-smoker         18.6      2                    41
10 GSM451171        33 non-smoker         23.7      3                    41
# ℹ 2 more variables: mode_of_delivery <chr>, placental_volume_cm3 <dbl>
genes_data <- read_tsv(here("data/02_dat_clean_genes.tsv"))  # Read gene data
print(head(genes_data, 10))  # Print the first 10 rows to display
# A tibble: 10 × 11,156
   sample_id ilmn_1343291 ilmn_1343292 ilmn_1343293 ilmn_1343294 ilmn_1651228
   <chr>            <dbl>        <dbl>        <dbl>        <dbl>        <dbl>
 1 GSM451162         149.         149.         259.         70.0         249.
 2 GSM451163         162.         162.         329.         97.0         160.
 3 GSM451164         156.         156.         302.         81.8         140.
 4 GSM451165         155.         155.         248.         67.8         198.
 5 GSM451166         130.         130.         270.         77.6         223.
 6 GSM451167         153.         153.         257.         69.9         202.
 7 GSM451168         170.         170.         254.         70.3         207.
 8 GSM451169         206.         206.         265.         69.6         217.
 9 GSM451170         146.         146.         274.         73.4         389.
10 GSM451171         154.         154.         297.         73.4         149.
# ℹ 11,150 more variables: ilmn_1651229 <dbl>, ilmn_1651237 <dbl>,
#   ilmn_1651254 <dbl>, ilmn_1651261 <dbl>, ilmn_1651262 <dbl>,
#   ilmn_1651268 <dbl>, ilmn_1651278 <dbl>, ilmn_1651282 <dbl>,
#   ilmn_1651286 <dbl>, ilmn_1651296 <dbl>, ilmn_1651336 <dbl>,
#   ilmn_1651346 <dbl>, ilmn_1651354 <dbl>, ilmn_1651358 <dbl>,
#   ilmn_1651364 <dbl>, ilmn_1651375 <dbl>, ilmn_1651378 <dbl>,
#   ilmn_1651385 <dbl>, ilmn_1651399 <dbl>, ilmn_1651405 <dbl>, …
clean_data <- read_tsv(here("data/02_dat_clean.tsv"))  # Read clean merged data
print(head(clean_data, 10))  # Print the first 10 rows to display
# A tibble: 10 × 11,163
   sample_id age_years status     maternal_bmi parity gestational_age_weeks
   <chr>         <dbl> <chr>             <dbl>  <dbl>                 <dbl>
 1 GSM451162        32 non-smoker         25.7      1                    39
 2 GSM451163        28 non-smoker         18.8      1                    37
 3 GSM451164        38 non-smoker         21.3      2                    36
 4 GSM451165        34 non-smoker         23.5      3                    41
 5 GSM451166        27 smoker             22        2                    42
 6 GSM451167        31 non-smoker         31.8      2                    39
 7 GSM451168        31 non-smoker         25.6      1                    39
 8 GSM451169        32 non-smoker         19.8      1                    39
 9 GSM451170        32 non-smoker         18.6      2                    41
10 GSM451171        33 non-smoker         23.7      3                    41
# ℹ 11,157 more variables: mode_of_delivery <chr>, placental_volume_cm3 <dbl>,
#   ilmn_1343291 <dbl>, ilmn_1343292 <dbl>, ilmn_1343293 <dbl>,
#   ilmn_1343294 <dbl>, ilmn_1651228 <dbl>, ilmn_1651229 <dbl>,
#   ilmn_1651237 <dbl>, ilmn_1651254 <dbl>, ilmn_1651261 <dbl>,
#   ilmn_1651262 <dbl>, ilmn_1651268 <dbl>, ilmn_1651278 <dbl>,
#   ilmn_1651282 <dbl>, ilmn_1651286 <dbl>, ilmn_1651296 <dbl>,
#   ilmn_1651336 <dbl>, ilmn_1651346 <dbl>, ilmn_1651354 <dbl>, …

Description of data

Analysis of data

# Extract genedata from merged_data
gene_cols <- grep("^ilmn",
                  names(merged_data),
                  value = TRUE)

merged_log2_fold <- merged_data |>
  group_by(status) |> 
  # Calculate the mean value for expression levels, for each gene, divided into smoker and non-smoker
  summarise(across(all_of(gene_cols),
                   mean, na.rm = TRUE)) |> 
  pivot_longer(cols = -status,
               names_to = "gene",
               values_to = "mean_expr") |> 
  pivot_wider(names_from = status, 
  # Calculate the log2_fold_change for each gene             
              values_from = mean_expr) |> 
  mutate(log2_fold_change = log2(`smoker` / `non-smoker`))
 
p_values <- merged_data |>
  pivot_longer(cols = all_of(gene_cols),
               names_to = "gene",
               values_to = "expression") |>
  # Calculate the p-value for each gene 
  group_by(gene) |> 
  summarise(p_value = t.test(expression ~ status)$p.value)

Analysis of data

  • Bulletpoints ::: {.column width=“40%”} Venstre

::::

Analysis of data

Venstre

Højre

Analysis of data

Sætter billede i midten :::: {.columns}

Venstre

Højre

::::

Her skrives der i kursiv

Analysis of data

Skriv her for at skrive i midten øverst på sildet

Venstre

Højre

Conclusion

:::: {.columns}

Venstre

Højre